5 Metagenomic data

Species-specific community analyses conducted to generate the data included in these analyses can be found in the annex.

5.1 Library complexity

Nonpareil estimate of the metagenomic complexity after removing host DNA.

all_data %>%
    select(dataset,Extraction,nonpareil,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(nonpareil), sd(nonpareil))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of breadth of host genome coverage")
tinytable_s8xeskjrc26t7rqxd1rw
Mean and standard deviation of breadth of host genome coverage
Taxon ZYMO DREX EHEX
Amphibian 0.9±0.1 0.8±0.1 0.8±0.1
Reptile 0.9±0.1 0.9±0.0 0.9±0.1
Mammal 0.8±0.2 0.8±0.1 0.7±0.3
Bird 0.9±0.1 1.0±0.0 0.8±0.4
Control 0.0±0.0 0.5±0.6 0.5±0.7
all_data %>%
    select(dataset,Extraction,nonpareil,Taxon,Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=nonpareil, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Nonpareil completeness",x="Extraction method")

all_data  %>%
    select(dataset,Extraction,Sample,Species,nonpareil,Taxon) %>%
    unique() %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(nonpareil ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_7l6aspca23l1ntyvl1qh
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.885552853 0.03450229 25.6664948 40.89542 7.753071e-27
fixed NA ExtractionDREX 0.005536892 0.04336611 0.1276779 48.00020 8.989373e-01
fixed NA ExtractionEHEX -0.112193866 0.04336611 -2.5871326 48.00020 1.276294e-02
ran_pars Sample sd__(Intercept) 0.059565487 NA NA NA NA
ran_pars Species sd__(Intercept) 0.035030813 NA NA NA NA
ran_pars Residual sd__Observation 0.150224598 NA NA NA NA

5.2 Alpha diversity

Variance partitioning metrics are derived from community_analysis.Rmd.

alpha_data <- list.files(path = "results", pattern = "^alpha_.*\\.tsv$", full.names = TRUE) %>%
  map_df(~ read_tsv(.)) %>%
  left_join(all_data,by= join_by(dataset==dataset))
alpha_data %>%
    pivot_longer(!c(dataset,Library,Species,Taxon,Sample,Extraction), names_to = "metric", values_to = "value") %>%
    filter(metric %in% c("richness","neutral","phylogenetic")) %>%
    mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
    mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal"))) %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=value, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(metric ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Diversity",x="Extraction method")

5.2.1 Richness

alpha_data %>%
    select(dataset,Extraction,Sample,Species,richness,Taxon) %>%
    unique() %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
    lmerTest::lmer(richness ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_q7mqr3gxbf2zxfhcmyk6
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 59.44444444 10.715517 5.5475108 10.12322 0.000234351
fixed NA ExtractionDREX 0.05555556 4.453352 0.0124750 34.97883 0.990117532
fixed NA ExtractionEHEX 1.38257472 4.545332 0.3041746 35.13134 0.762789322
ran_pars Sample sd__(Intercept) 16.00359332 NA NA NA NA
ran_pars Species sd__(Intercept) 28.56742258 NA NA NA NA
ran_pars Residual sd__Observation 13.36005511 NA NA NA NA

5.2.2 Neutral

alpha_data %>%
    select(dataset,Extraction,Sample,Species,neutral,Taxon) %>%
    unique() %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
    lmerTest::lmer(neutral ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_6unjkqsxoq49bsk1qnc7
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 29.5413181 5.139162 5.7482754 9.895197 0.000193227
fixed NA ExtractionDREX -2.1102758 1.922343 -1.0977625 35.042971 0.279794105
fixed NA ExtractionEHEX -0.3422373 1.962694 -0.1743712 35.152526 0.862574163
ran_pars Sample sd__(Intercept) 8.6429526 NA NA NA NA
ran_pars Species sd__(Intercept) 13.5543072 NA NA NA NA
ran_pars Residual sd__Observation 5.7670282 NA NA NA NA

5.2.3 Phylogenetic

alpha_data %>%
    select(dataset,Extraction,Sample,Species,phylogenetic,Taxon) %>%
    unique() %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
    lmerTest::lmer(phylogenetic ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_kziiw7b0zebbu8c0pfoj
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 4.7721984 0.4324985 11.034024 9.653749 8.685117e-07
fixed NA ExtractionDREX 0.1521100 0.1403528 1.083769 35.015284 2.858737e-01
fixed NA ExtractionEHEX 0.1852709 0.1433723 1.292236 35.056809 2.047290e-01
ran_pars Sample sd__(Intercept) 1.2179889 NA NA NA NA
ran_pars Species sd__(Intercept) 0.9236345 NA NA NA NA
ran_pars Residual sd__Observation 0.4210584 NA NA NA NA

5.3 Microbial complexity recovery

all_data %>%
    select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon) %>%
    mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction*100))) %>%
    mutate(damr=ifelse(is.na(damr),0,damr)) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(damr), sd(damr))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of breadth of host genome coverage")
tinytable_i7lgsnsmqfgw8lk0mjlr
Mean and standard deviation of breadth of host genome coverage
Taxon ZYMO DREX EHEX
Amphibian 0.8±0.4 0.8±0.3 0.8±0.3
Reptile 0.9±0.1 1.0±0.0 1.0±0.0
Mammal 0.8±0.1 0.9±0.1 0.8±0.3
Bird 0.6±0.4 0.5±0.4 0.6±0.4
Control 1.0±0.0 1.0±0.0 1.0±0.0
all_data %>%
    select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon,Sample,Species) %>%
    mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction*100))) %>%
    mutate(damr=ifelse(is.na(damr),0,damr)) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=damr, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Domain-adjusted mapping rate",x="Extraction method")

all_data  %>%
    select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon, Sample, Species) %>%
    mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction*100))) %>%
    mutate(damr=ifelse(is.na(damr),0,damr)) %>%
    unique() %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO","DREX","EHEX"))) %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(damr ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_mlh11cxhjaziae2tlowv
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.78271978 0.06660971 11.7508366 15.45687 4.117675e-09
fixed NA ExtractionDREX 0.03348421 0.03977983 0.8417385 130.19072 4.014779e-01
fixed NA ExtractionEHEX 0.02228400 0.03999367 0.5571881 130.24817 5.783552e-01
ran_pars Sample sd__(Intercept) 0.12024342 NA NA NA NA
ran_pars Species sd__(Intercept) 0.19047224 NA NA NA NA
ran_pars Residual sd__Observation 0.20283813 NA NA NA NA

5.4 Variance partitioning

Variance partitioning metrics are derived from community_analysis.Rmd.

variance_data <- list.files(path = "results", pattern = "^var_.*\\.tsv$", full.names = TRUE) %>%
  map_df(~ read_tsv(.))

variance_data %>% summarise(mean=mean(r2),sd=sd(r2))
# A tibble: 1 × 2
   mean     sd
  <dbl>  <dbl>
1 0.102 0.0768
variance_data %>%
    left_join(all_data %>% select(Species,Taxon) %>% unique(),by=join_by(species==Species)) %>%
    mutate(metric=factor(metric,levels=c("phylogenetic","neutral","richness"))) %>%
    mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal"))) %>%
    ggplot(aes(x=r2,y=metric)) +
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter(aes(color=species))+
        scale_color_manual(values=vertebrate_colors) +
        xlim(0,0.5)+
        theme_minimal() +
        labs(y="Diversity metric",x="Explained variance")

variance_data %>%
    group_by(metric) %>%
    summarise(mean=mean(r2),sd=sd(r2)) %>%
    tt()
tinytable_4lsdabickm4z3m2z4u0s
metric mean sd
neutral 0.06201935 0.04744380
phylogenetic 0.07381277 0.06884687
richness 0.16970818 0.06626388

5.5 Combined community analysis

species="combined"
genus=species

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset)

read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134.tree.gz", "data/DMB0134.tree.gz")
genome_tree <- read_tree("data/DMB0134.tree.gz")

5.5.1 Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

5.5.2 Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>%
    mutate(Taxon=factor(Taxon,levels=c("Amphibian",
                                       "Reptile",
                                       "Mammal",
                                       "Bird",
                                       "Control"))) %>%
    mutate(Extraction=factor(Extraction,levels=c("ZYMO",
                                                 "DREX",
                                                 "EHEX"))) %>%
    mutate(Species=factor(Species,levels=c("Calotriton asper",
                                           "Lissotriton helveticus",
                                           "Salamandra atra",
                                           "Chalcides striatus",
                                           "Natrix astreptophora",
                                           "Podarcis muralis",
                                           "Plecotus auritus",
                                           "Sciurus carolinensis",
                                           "Trichosurus vulpecula",
                                           "Geospizopsis unicolor",
                                           "Perisoreus infaustus",
                                           "Zonotrichia capensis",
                                           "Extraction control",
                                           "Library control"))) %>%
    filter(Taxon != "Control") %>%
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Taxon + Species + Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.title=element_blank(),
                panel.spacing = unit(0, "lines"))

ggsave(paste0("figures/barplot_",genus,".pdf"))
genome_counts_NMDS <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="dist") %>%
            metaMDS(.,trymax = 999, k=2, trace=0) %>%
            vegan::scores() %>%
            as_tibble(., rownames = "dataset") %>%
            left_join(sample_metadata, by = join_by(dataset == dataset)) %>%
            filter(Taxon != "Control") %>%
            group_by(Sample) %>%
            mutate(sample_x=mean(NMDS1), sample_y=mean(NMDS2))

genome_counts_NMDS %>%
            ggplot(., aes(x=NMDS1,y=NMDS2, color=Species, shape=Extraction)) +
                scale_color_manual(values=vertebrate_colors) +
                geom_point(size=3, alpha=0.8) +
                geom_segment(aes(x=sample_x, y=sample_y, xend=NMDS1, yend=NMDS2), alpha=0.2) +
                theme_classic() +
                theme(legend.position="right", legend.box="vertical") +
                guides(color=guide_legend(title="Species"))